#include "stdafx.h"
#include "hnum_pdsp_defs.h"
namespace harlinn
{
    namespace numerics
    {
        namespace SuperLU
        {

            void
            pxgstrf_scheduler(const int pnum, const int n, const int *etree, 
		              int *cur_pan, int *bcol, pxgstrf_shared_Base_t *pxgstrf_shared)
            {
            /*
             * -- SuperLU MT routine (version 1.0) --
             * Univ. of California Berkeley, Xerox Palo Alto Research Center,
             * and Lawrence Berkeley National Lab.
             * August 15, 1997
             *
             * Purpose
             * =======
             *
             * pxgstrf_scheduler() gets a panel for the processor to work on. 
             * It schedules a panel in decreasing order of priority:
             *   (1) the current panel's parent, if it can be done without pipelining
             *   (2) any other panel in the queue that can be done without pipelining
             *       ("CANGO" status)
             *   (3) any other panel in the queue that can be done with pipelining
             *       ("CANPIPE" status)
             *
             * Arguments
             * =========
             * pnum    (input) int
             *         Processor number.
             *
             * n       (input) int
             *         Column dimension of the matrix.
             *
             * etree   (input) int*
             *         Elimination tree of A'*A, size n.
             *         Note: etree is a vector of parent pointers for a forest whose
             *         vertices are the integers 0 to n-1; etree[root] = n.
             *
             * cur_pan (input/output) int*
             *         On entry, the current panel just finished by this processor;
             *         On exit, [0, n-1]: the new panel to work on;
             *                  EMPTY:    failed to get any work, will try later;
             *                  n:        all panels are taken; ready to terminate.
             *
             * taskq   (input/output) queue_t*
             *         Global work queue.
             *
             * fb_cols (input/output) int*
             *         The farthest busy descendant of each (leading column of the) panel.
             *
             * bcol    (output) int*
             *         The most distant busy descendant of cur_pan in the *linear*
             *         pipeline of busy descendants. If all proper descendants of
             *         cur_pan are done, bcol is returned equal to cur_pan.
             *
             * Defining terms
             * ==============
             *   o parent(panel) = etree(last column in the panel)
             *   o the kids of a panel = collective kids of all columns in the panel
             *     kids[REP] = SUM_{j in panel} ( kids[j] ) 
             *   o linear pipeline - what does it mean in the panel context?
             *       if ukids[REP] = 0, then the panel becomes a leaf (CANGO)
             *       if ukids[REP] = 1 && ukids[firstcol] = 1, then the panel can
             *                       be taken with pipelining (CANPIPE)
             *
             * NOTES
             * =====
             *   o When a "busy" panel finishes, if its parent has only one remaining
             *     undone child there is no check to see if the parent should change
             *     from "unready" to "canpipe". Thus a few potential pipelinings will
             *     be lost, but checking out this pipeline opportunity may be costly.
             *
             */

                register int dad, dad_ukids, jcol, w, j;
                int *fb_cols = pxgstrf_shared->fb_cols;
                queue_t *taskq = &pxgstrf_shared->taskq;
                Gstat_t *Gstat = pxgstrf_shared->Gstat;
            #ifdef PROFILE
                double t;
            #endif

                jcol = *cur_pan;
                if ( jcol != EMPTY ) {
            #ifdef DOMAINS
	            if ( in_domain[jcol] == TREE_DOMAIN )
	                dad = etree[jcol];
	            else
            #endif
	                dad = DADPANEL (jcol);
                }

                /* w_top = sp_ienv(1)/2;
                   if ( w_top == 0 ) w_top = 1;*/

            #ifdef PROFILE
                TIC(t);
            #endif
            #if ( MACH==SUN )
                mutex_lock( &pxgstrf_shared->lu_locks[SCHED_LOCK] );
            #elif ( MACH==DEC || MACH==PTHREAD )
                pthread_mutex_lock( &pxgstrf_shared->lu_locks[SCHED_LOCK] );
            #elif ( MACH==SGI || MACH==ORIGIN )
            #pragma critical lock(pxgstrf_shared->lu_locks[SCHED_LOCK])
            #elif ( MACH==CRAY_PVP )
            #pragma _CRI guard SCHED_LOCK
            #elif ( MACH==OPENMP )
            #pragma omp critical ( SCHED_LOCK )
            #endif    

            {   /* ---- START CRITICAL SECTION ---- */
    
                /* Update the status of the current panel and its parent, so that
                 * the other processors waiting on it can proceed.
                 * If all siblings are done, and dad is not busy, then take dad.
                 */
                if ( jcol != EMPTY ) { /* jcol was just finished by this processor */    
	            dad_ukids = --pxgstrf_shared->pan_status[dad].ukids;
	
            #ifdef DEBUG
	            printf("(%d) DONE %d in Scheduler(), dad %d, STATE %d, dad_ukids %d\n",
	                   pnum, jcol, dad, STATE(dad), dad_ukids);
            #endif	

	            if ( dad_ukids == 0 && STATE( dad ) > BUSY ) { /* dad not started */
	                jcol = dad;
            #ifdef DEBUG
	                printf("(%d) Scheduler[1] Got dad %d, STATE %d\n",
		               pnum, jcol, STATE(dad));
            #endif
            #ifdef PROFILE
	                ++(Gstat->panhows[DADPAN]);
            #endif	    
	            } else {
	                /* Try to get a panel from the task Q. */
	                while ( 1 ) {
		            /*>>if ( (j = Dequeue(taskq, &item)) == EMPTY ) {*/
		            if ( taskq->count <= 0 ) {
		                jcol = EMPTY;
		                break;
		            } else {
		                jcol = taskq->queue[taskq->head++];
		                --taskq->count;
		                if ( STATE( jcol ) >= CANGO ) { /* CANGO or CANPIPE */
            #ifdef DEBUG
			            printf("(%d) Dequeue[1] Got %d, STATE %d, Qcount %d\n",
			                   pnum, jcol, STATE(jcol), j);
            #endif
            #ifdef PROFILE
			            if (STATE( jcol ) == CANGO) ++(Gstat->panhows[NOPIPE]);
			            else ++(Gstat->panhows[PIPE]);
            #endif			
		                    break;
		                }
		            }
	                } /* while */
	            }
                } else {
	            /*
	             * jcol was EMPTY; Try to get a panel from the task Q.
	             */
    	            while ( 1 ) {
    	                /*>>if ( (j = Dequeue(taskq, &item)) == EMPTY ) {*/
	                if ( taskq->count <= 0 ) {
		            jcol = EMPTY;
		            break;
	                } else {
		            jcol = taskq->queue[taskq->head++];
		            --taskq->count;
		            if ( STATE( jcol ) >= CANGO ) { /* CANGO or CANPIPE */
            #ifdef DEBUG
		                printf("(%d) Dequeue[2] Got %d, STATE %d, Qcount %d\n",
			               pnum, jcol, STATE(jcol), j);
            #endif
            #ifdef PROFILE
		                if (STATE( jcol ) == CANGO) ++(Gstat->panhows[NOPIPE]);
		                else ++(Gstat->panhows[PIPE]);
            #endif			
		                break;
		            }
	                }
	            } /* while */
                }
    
                /*
                 * Update the status of the new panel "jcol" and its parent "dad".
                 */
                if ( jcol != EMPTY ) {
	                --pxgstrf_shared->tasks_remain;
            #ifdef DOMAINS
	            if ( in_domain[jcol] == TREE_DOMAIN ) {
	                /* Dequeue the first descendant of this domain */
	                *bcol = taskq->queue[taskq->head++];
	                --taskq->count;
	            } else
            #endif
	            {
	                STATE( jcol ) = BUSY;
	                w = pxgstrf_shared->pan_status[jcol].size;

	                for (j = jcol; j < jcol+w; ++j) pxgstrf_shared->spin_locks[j] = 1;
	                dad = DADPANEL (jcol);
	                if ( dad < n && pxgstrf_shared->pan_status[dad].ukids == 1 ) {
		            STATE( dad ) = CANPIPE;
		            /*>> j = Enqueue(taskq, dad);*/
		            taskq->queue[taskq->tail++] = dad;
		            ++taskq->count;
            #ifdef DEBUG
		            printf("(%d) Enqueue() %d's dad %d ->CANPIPE, Qcount %d\n",
		                   pnum, jcol, dad, j);
            #endif
	                }

            #ifdef PROFILE
	                Gstat->procstat[pnum].panels++;
            #endif
	
	                /* Find the farthest busy descendant of the new panel
	                   and its parent.*/
	                *bcol = fb_cols[jcol];
            #ifdef DEBUG
	                printf("(%d) Scheduler[2] fb_cols[%d]=%d, STATE %d\n",
		               pnum, jcol, *bcol, STATE( *bcol ));
            #endif
	                while ( STATE( *bcol ) == DONE ) *bcol = DADPANEL (*bcol);
	                fb_cols[dad] = *bcol;
	
	            } /* else regular_panel */

                } /* if jcol != empty */

                *cur_pan = jcol;

            #ifdef DEBUG
                printf("(%d) Exit C.S. tasks_remain %d, cur_pan %d\n", 
	               pnum, pxgstrf_shared->tasks_remain, jcol);
            #endif

            } /* ---- END CRITICAL SECTION ---- */
    
            #if ( MACH==SUN )
                /* Exit C.S. */
                mutex_unlock( &pxgstrf_shared->lu_locks[SCHED_LOCK] );
            #elif ( MACH==DEC || MACH==PTHREAD )
                pthread_mutex_unlock( &pxgstrf_shared->lu_locks[SCHED_LOCK] );
            #elif ( MACH==CRAY_PVP )
            #pragma _CRI endguard SCHED_LOCK
            #endif    

            #ifdef PROFILE
                Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
            #endif

                return;
            }


            /* Fix the order of the panels to be taken. */
            void
            Preorder(const int pnum, const int n, const int *etree, int *cur_pan,
                     queue_t *taskq, int *fb_cols, int *bcol,
	             pxgstrf_shared_Base_t *pxgstrf_shared)
            {
                register int w, dad, dad_ukids;

            #undef POSTORDER
            #ifdef POSTORDER
                if ( *cur_pan == EMPTY ) {
	            *cur_pan = 0;
                } else {
	            w = pxgstrf_shared->pan_status[*cur_pan].size;
	            *cur_pan += w;
                }
            #else /* Breadth-first bottom up */
                if ( *cur_pan != EMPTY ) {
	            dad = DADPANEL (*cur_pan);
	            dad_ukids = --pxgstrf_shared->pan_status[dad].ukids;
	            if ( dad_ukids == 0 ) {
	                taskq->queue[taskq->tail++] = dad;
	                ++taskq->count;
	            }
                }
                *cur_pan = taskq->queue[taskq->head++];
                --taskq->count;
            #endif
                --pxgstrf_shared->tasks_remain;
                *bcol = *cur_pan;
            }
        };
    };
};

